import pandas as pd
import numpy as np

mn = pd.read_excel('../original/2020-post-election-review-results.xlsx',
                   header=1)

mn = mn.rename(columns={
    'Precinct': 'precinct',
    'County': 'county',
    'Office': 'office',
    'Candidate Name': 'candidate',
    'Original Votes': 'original_votes',
    'Hand Counted Votes': 'audited_votes'
    })

mn['state'] = 'MINNESOTA'
mn['date'] = '2020-11-03'
mn['district'] = mn.office.str.extract('District (.*)')
mn['diff'] = mn.audited_votes - mn.original_votes

mn = mn.loc[mn.precinct != 'End of worksheet']

mn.office = mn.office.str.replace('( District .*)','', regex=True)
office_map = {
    'U.S. President & Vice President': 'US PRESIDENT',
    'U.S. Representative': 'US HOUSE',
    'U.S. Senator': 'US SENATE'
    }
for f in office_map.keys():
    mn.loc[mn.office == f, 'office'] = office_map[f]

mn.county = mn.county.str.upper()

mn.candidate = mn.candidate.str.upper()
cand_map = {
 'DONALD J. TRUMP AND MICHAEL R. PENCE': 'DONALD J TRUMP',
 'JOSEPH R. BIDEN AND KAMALA HARRIS': 'JOSEPH R BIDEN',
 'ROQUE "ROCKY" DE LA FUENTE AND DARCY RICHARDSON': 'ROQUE "ROCKY" DE LA FUENTE',
 'HOWIE HAWKINS AND ANGELA WALKER': 'HOWIE HAWKINS',
 'KANYE WEST AND MICHELLE TIDBALL': 'KANYE WEST',
 'BROCK PIERCE AND KARLA BALLARD': 'BROCK PIERCE',
 'GLORIA LA RIVA AND LEONARD PELTIER': 'GLORIA LA RIVA',
 'ALYSON KENNEDY AND MALCOLM JARRETT': 'ALYSON KENNEDY',
 'JO JORGENSEN AND JEREMY "SPIKE" COHEN': 'JO JORGENSEN',
 'BLANK FOR OFFICE': 'UNDERVOTES',
 'OVER / DEFECTIVE FOR OFFICE': 'OVERVOTES',
 'WRITE-IN**': 'WRITEIN'
 }
for cand in cand_map.keys():
    mn.loc[mn.candidate == cand, 'candidate'] = cand_map[cand]

mn['party'] = ''
party_map = {'DONALD J TRUMP': 'REPUBLICAN',
'JOSEPH R BIDEN': 'DEMOCRAT',
'ROQUE "ROCKY" DE LA FUENTE': 'REPUBLICAN',
'HOWIE HAWKINS': 'OTHER',
'KANYE WEST': 'NONPARTISAN',
'BROCK PIERCE': 'NONPARTISAN',
'GLORIA LA RIVA': 'OTHER',
'ALYSON KENNEDY': 'OTHER',
'JO JORGENSEN': 'LIBERTARIAN',
'UNDERVOTES': '',
'OVERVOTES': '',
'WRITEIN': '',
'JUDITH SCHWARTZBACKER': 'OTHER',
'PETE STAUBER': 'REPUBLICAN',
'QUINN NYSTROM': 'DEMOCRAT',
"KEVIN O'CONNOR": 'OTHER',
'OLIVER STEINBERG': 'OTHER',
'JASON LEWIS': 'REPUBLICAN',
'TINA SMITH': 'DEMOCRAT',
'TOM EMMER': 'REPUBLICAN',
'TAWNJA ZAHRADKA': 'DEMOCRAT',
'KENDALL QUALLS': 'REPUBLICAN',
'DEAN PHILLIPS': 'DEMOCRAT',
'SLATER JOHNSON': 'OTHER',
'RAE HART ANDERSON': 'OTHER',
'MICHELLE FISCHBACH': 'REPUBLICAN',
'COLLIN C PETERSON': 'DEMOCRAT',
'BILL ROOD': 'OTHER',
'JIM HAGEDORN': 'REPUBLICAN',
'DAN FEEHAN': 'DEMOCRAT',
'ADAM CHARLES WEEKS': 'OTHER',
'TYLER KISTNER': 'REPUBLICAN',
'ANGIE CRAIG': 'DEMOCRAT',
'MICHAEL MOORE': 'DEMOCRAT',
'LACY JOHNSON': 'REPUBLICAN',
'ILHAN OMAR': 'DEMOCRAT',
'SUSAN SINDT': 'OTHER',
'GENE RECHTZIGEL': 'REPUBLICAN',
'BETTY MCCOLLUM': 'DEMOCRAT'
}
for cand in party_map.keys():
    mn.loc[mn.candidate == cand, 'party'] = party_map[cand]

mn = mn[[
         'precinct',
         'county',
         'office',
         'candidate',
         'original_votes',
         'audited_votes',
         'state',
         'date',
         'district',
         'party',
         'diff'
        ]]

mn.to_csv('../ready/mn_clean.csv', index=False)


